R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 1.0.2
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.2
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
SIAP <- read.csv("/Users/erikaluna/R\ Studio/msc_thesis/SIAP.csv")
maize_irri_state <- SIAP %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year < 2002, crop == "maiz", water == "irrigated") %>%
select(year, state, water, yield, losses)
#maize_irri_state %>%
#DT::datatable()
maize_irri_mun <- SIAP %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year > 2001, crop == "maiz", water == "irrigated") %>%
select(year, state, water, yield, losses)
maize_irri_mun %>%
DT::datatable()
maize_rain_state <- SIAP %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year < 2002, crop == "maiz", water == "rainfed") %>%
select(year, state, water, yield, losses)
#maize_rain_state %>%
#DT::datatable()
maize_rain_mun <- SIAP %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year > 2001, crop == "maiz", water == "rainfed") %>%
select(year, state, water, yield, losses)
maize_rain_mun %>%
DT::datatable()
maize_water_state <- bind_rows(maize_irri_state, maize_rain_state)
maize_water_mun <- bind_rows(maize_irri_mun, maize_rain_mun)
maize_water_state %>%
ggplot(aes(water, yield)) +
geom_boxplot() +
scale_y_continuous("Yield (ha)", trans = "log10") +
ylab("Yield (ha)") +
xlab("Water mode") +
ggtitle("Maize State level")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).

#scale_y_continuous(labels = comma) +
#theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
maize_water_mun %>%
ggplot(aes(water, yield)) +
geom_boxplot() +
scale_y_continuous("Yield (ha)", trans = "log10") +
ylab("Yield (ha)") +
xlab("Water mode") +
ggtitle("Maize Municipal level")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing non-finite values (stat_boxplot).

#scale_y_continuous(labels = comma) +
#theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
one_crop <- SIAP %>%
filter(crop == "maiz") %>%
group_by(year, state) %>%
summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
ag_prod = sum(production),
ag_planted = sum(planted),
ag_harv = sum(harvested),
ag_losses = sum(losses))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
one_crop %>%
DT::datatable()
Maiz national
maize_nat <- SIAP %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), crop == "maiz") %>%
group_by(year) %>%
summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
ag_prod = sum(production),
ag_planted = sum(planted),
ag_harv = sum(harvested),
ag_losses = sum(losses))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_nat %>%
DT::datatable()
Number of observations
## `summarise()` ungrouping output (override with `.groups` argument)
## Panel data A data frama with all years and all states that grow maize.
period <- tibble(rep(c(1980:2016), times = 32)) #32 states report maize production
colnames(period) <- c("year")
states <- tibble(rep(c("aguascalientes","baja california","baja california sur",
"campeche", "chiapas", "chihuahua","coahuila","colima",
"distrito federal","durango",
"guanajuato", "guerrero", "hidalgo", "jalisco",
"mexico", "michoacan", "morelos", "nayarit","nuevo leon",
"oaxaca", "puebla", "queretaro", "quintana roo",
"san luis potosi", "sinaloa", "sonora", "tabasco",
"tamaulipas", "tlaxcala","veracruz", "yucatan", "zacatecas"), times = 37))
colnames(states) <- c("state")
states <- states %>%
arrange(state)
states_period <- cbind(states, period)
Data frame for Maize
maize <- left_join(states_period, one_crop, by=c("state", "year"))
maize <- maize %>%
transform(i=as.numeric(factor(state))) %>%
transform(t=as.numeric(factor(year))) %>%
group_by(year) %>%
arrange(state)
maize %>%
DT::datatable()
Descriptive Statistics
summary(maize)
## state year ag_yield ag_prod
## Length:1184 Min. :1980 Min. : 0.260 Min. : 25
## Class :character 1st Qu.:1989 1st Qu.: 1.670 1st Qu.: 105683
## Mode :character Median :1998 Median : 2.485 Median : 359930
## Mean :1998 Mean : 3.484 Mean : 737542
## 3rd Qu.:2007 3rd Qu.: 4.320 3rd Qu.: 979578
## Max. :2016 Max. :39.870 Max. :8337076
## NA's :68 NA's :68
## ag_planted ag_harv ag_losses i
## Min. : 6.5 Min. : 6.5 Min. : 0 Min. : 1.00
## 1st Qu.: 61926.8 1st Qu.: 46913.2 1st Qu.: 2682 1st Qu.: 8.75
## Median :162533.1 Median :138180.0 Median : 16725 Median :16.50
## Mean :263319.8 Mean :231465.7 Mean : 32226 Mean :16.50
## 3rd Qu.:465139.0 3rd Qu.:405935.0 3rd Qu.: 44643 3rd Qu.:24.25
## Max. :988367.2 Max. :960143.8 Max. :416498 Max. :32.00
## NA's :66 NA's :67 NA's :83
## t
## Min. : 1
## 1st Qu.:10
## Median :19
## Mean :19
## 3rd Qu.:28
## Max. :37
##
ggplot(maize) +
aes(x = ag_yield) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 68 rows containing non-finite values (stat_bin).

ggplot(maize) +
aes(x = ag_yield) +
geom_density()
## Warning: Removed 68 rows containing non-finite values (stat_density).

maize %>%
group_by(state) %>%
summarize(max_yield = max(ag_yield, na.rm=TRUE),
min_yield = min(ag_yield, na.rm=TRUE),
range_yield = max(ag_yield, na.rm=TRUE) - min(ag_yield, na.rm=TRUE),
sd_yield = sd(ag_yield, na.rm=TRUE),
mean_yield = mean(ag_yield, na.rm=TRUE),
median_yield = median(ag_yield, na.rm=TRUE)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
39.87 |
2.00 |
37.87 |
7.2798377 |
11.8245714 |
10.59 |
| baja california |
38.54 |
0.98 |
37.56 |
8.5605573 |
7.2329032 |
3.65 |
| baja california sur |
7.41 |
2.64 |
4.77 |
1.3394085 |
5.2271429 |
5.24 |
| campeche |
2.74 |
0.58 |
2.16 |
0.5573117 |
1.5691429 |
1.56 |
| chiapas |
2.56 |
1.45 |
1.11 |
0.2942522 |
1.9654286 |
1.90 |
| chihuahua |
9.97 |
0.57 |
9.40 |
2.4569107 |
5.1268571 |
4.85 |
| coahuila |
18.86 |
1.07 |
17.79 |
3.7133300 |
5.1442857 |
4.61 |
| colima |
10.51 |
1.76 |
8.75 |
2.1389538 |
4.0705714 |
3.12 |
| distrito federal |
11.00 |
1.97 |
9.03 |
1.4986115 |
2.7760000 |
2.48 |
| durango |
12.23 |
1.14 |
11.09 |
2.4056917 |
3.8565714 |
3.48 |
| guanajuato |
5.90 |
1.56 |
4.34 |
1.3113085 |
3.1977143 |
2.76 |
| guerrero |
3.06 |
0.97 |
2.09 |
0.4969693 |
2.2025714 |
2.26 |
| hidalgo |
3.46 |
1.56 |
1.90 |
0.5767255 |
2.2722857 |
2.21 |
| jalisco |
10.60 |
3.16 |
7.44 |
2.0650197 |
6.1022857 |
5.63 |
| mexico |
6.51 |
2.55 |
3.96 |
1.0881238 |
4.3457143 |
4.56 |
| michoacan |
4.15 |
1.46 |
2.69 |
0.7386203 |
2.5660000 |
2.31 |
| morelos |
3.50 |
0.91 |
2.59 |
0.7066578 |
2.3077143 |
2.10 |
| nayarit |
8.78 |
1.91 |
6.87 |
1.6842245 |
3.7422857 |
2.95 |
| nuevo leon |
11.06 |
1.00 |
10.06 |
2.2887925 |
3.3737143 |
2.64 |
| oaxaca |
1.54 |
0.92 |
0.62 |
0.1389323 |
1.2651429 |
1.26 |
| puebla |
2.79 |
1.13 |
1.66 |
0.3887487 |
1.9354286 |
1.99 |
| queretaro |
15.83 |
0.95 |
14.88 |
3.1923000 |
4.7117143 |
3.86 |
| quintana roo |
0.99 |
0.26 |
0.73 |
0.1616054 |
0.6131429 |
0.60 |
| san luis potosi |
1.75 |
0.81 |
0.94 |
0.2563096 |
1.2337143 |
1.18 |
| sinaloa |
10.54 |
1.03 |
9.51 |
3.0795467 |
5.7425714 |
6.14 |
| sonora |
7.05 |
2.77 |
4.28 |
1.1458762 |
4.6897143 |
4.69 |
| tabasco |
2.35 |
1.19 |
1.16 |
0.1957906 |
1.6331429 |
1.62 |
| tamaulipas |
4.94 |
1.56 |
3.38 |
0.9109864 |
2.9554286 |
2.74 |
| tlaxcala |
5.66 |
1.69 |
3.97 |
1.3930423 |
3.4051429 |
2.69 |
| veracruz |
2.44 |
1.47 |
0.97 |
0.2583350 |
1.8357143 |
1.84 |
| yucatan |
1.71 |
0.49 |
1.22 |
0.1963811 |
0.8885714 |
0.89 |
| zacatecas |
8.98 |
0.74 |
8.24 |
1.7751668 |
2.0965714 |
1.34 |
Plots
Production
maize %>%
group_by(state) %>%
summarise(max_prod = max(ag_prod, na.rm=T),
min_prod = min(ag_prod, na.rm=T),
range_prod = max(ag_prod, na.rm=T) - min(ag_prod, na.rm=T),
sd_prod = sd(ag_prod, na.rm=T),
mean_prod = mean(ag_prod, na.rm=T),
median_prod = median(ag_prod, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
1493778.03 |
120108.00 |
1373670.03 |
421743.447 |
752373.75 |
715440.57 |
| baja california |
68442.00 |
25.00 |
68417.00 |
16092.783 |
18277.56 |
13658.00 |
| baja california sur |
97642.00 |
6634.00 |
91008.00 |
27614.561 |
37919.32 |
31540.65 |
| campeche |
464714.94 |
12839.00 |
451875.94 |
135606.273 |
175362.01 |
133041.15 |
| chiapas |
2135550.08 |
983415.00 |
1152135.08 |
282788.096 |
1461322.34 |
1460524.00 |
| chihuahua |
2319376.63 |
238326.00 |
2081050.63 |
550629.484 |
1211653.85 |
1125302.76 |
| coahuila |
937497.77 |
21696.96 |
915800.81 |
193974.135 |
184256.92 |
132786.00 |
| colima |
145185.85 |
46313.30 |
98872.55 |
21054.258 |
83773.46 |
77459.00 |
| distrito federal |
50161.00 |
9515.15 |
40645.85 |
9107.056 |
22218.15 |
23383.00 |
| durango |
2773265.28 |
194100.43 |
2579164.85 |
530611.100 |
689347.17 |
496439.00 |
| guanajuato |
2337499.61 |
383021.00 |
1954478.61 |
472100.685 |
1033940.03 |
897201.00 |
| guerrero |
1428121.17 |
331411.00 |
1096710.17 |
269719.455 |
1017910.16 |
1038965.35 |
| hidalgo |
879327.76 |
233929.00 |
645398.76 |
154093.490 |
519376.35 |
513798.00 |
| jalisco |
8337075.94 |
2420683.00 |
5916392.94 |
1453557.271 |
4451739.90 |
4053390.00 |
| mexico |
3610125.45 |
1037491.00 |
2572634.45 |
574154.477 |
2602864.79 |
2600755.36 |
| michoacan |
1935286.73 |
606327.00 |
1328959.73 |
340786.696 |
1173786.27 |
1130893.00 |
| morelos |
122714.05 |
29214.00 |
93500.05 |
20841.622 |
85761.65 |
90722.50 |
| nayarit |
439237.43 |
103011.00 |
336226.43 |
78919.814 |
219908.24 |
208767.21 |
| nuevo leon |
880313.40 |
30372.90 |
849940.50 |
176381.775 |
181841.54 |
128472.00 |
| oaxaca |
851011.00 |
220535.00 |
630476.00 |
164203.079 |
607910.18 |
645255.35 |
| puebla |
1417706.20 |
522662.00 |
895044.20 |
227714.433 |
1005639.22 |
1074593.00 |
| queretaro |
1035148.30 |
58909.00 |
976239.30 |
321939.078 |
423422.56 |
335106.00 |
| quintana roo |
67469.92 |
4160.32 |
63309.60 |
16919.277 |
31771.94 |
33769.56 |
| san luis potosi |
256366.00 |
68751.00 |
187615.00 |
39217.713 |
167479.30 |
172358.00 |
| sinaloa |
6462324.35 |
64754.00 |
6397570.35 |
1910634.563 |
2297744.80 |
2449096.00 |
| sonora |
841582.00 |
46513.00 |
795069.00 |
179725.857 |
235612.83 |
179494.74 |
| tabasco |
181556.66 |
53710.00 |
127846.66 |
29035.967 |
106481.81 |
104467.40 |
| tamaulipas |
1356220.00 |
153360.81 |
1202859.19 |
285901.402 |
612883.77 |
557779.01 |
| tlaxcala |
733642.80 |
134414.00 |
599228.80 |
168570.419 |
441059.10 |
407526.00 |
| veracruz |
1360234.55 |
569513.00 |
790721.55 |
222325.058 |
967546.76 |
966462.62 |
| yucatan |
160737.44 |
8924.00 |
151813.44 |
40031.634 |
109519.68 |
120614.85 |
| zacatecas |
2689221.34 |
221965.00 |
2467256.34 |
508583.271 |
588443.59 |
389535.00 |
maize %>%
ggplot(aes(state, ag_prod)) +
geom_boxplot() +
ylab("Production (tonnes)") +
xlab("State") +
#scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_prod)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
## state obs
## <chr> <int>
## 1 aguascalientes 35
## 2 baja california sur 35
## 3 campeche 35
## 4 chiapas 35
## 5 chihuahua 35
## 6 coahuila 35
## 7 colima 35
## 8 distrito federal 35
## 9 durango 35
## 10 guanajuato 35
## # … with 21 more rows
maize_ts <- maize %>%
ggplot(aes(year, ag_prod)) +
geom_line()+
ylab("Production (tonnes)") +
xlab("Years") +
ggtitle("maize Production 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Yield
maize %>%
group_by(state) %>%
summarise(max_yield = max(ag_yield, na.rm=T),
min_yield = min(ag_yield, na.rm=T),
range_yield = max(ag_yield, na.rm=T) - min(ag_yield, na.rm=T),
sd_yield = sd(ag_yield, na.rm=T),
mean_yield = mean(ag_yield, na.rm=T),
median_yield = median(ag_yield, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
39.87 |
2.00 |
37.87 |
7.2798377 |
11.8245714 |
10.59 |
| baja california |
38.54 |
0.98 |
37.56 |
8.5605573 |
7.2329032 |
3.65 |
| baja california sur |
7.41 |
2.64 |
4.77 |
1.3394085 |
5.2271429 |
5.24 |
| campeche |
2.74 |
0.58 |
2.16 |
0.5573117 |
1.5691429 |
1.56 |
| chiapas |
2.56 |
1.45 |
1.11 |
0.2942522 |
1.9654286 |
1.90 |
| chihuahua |
9.97 |
0.57 |
9.40 |
2.4569107 |
5.1268571 |
4.85 |
| coahuila |
18.86 |
1.07 |
17.79 |
3.7133300 |
5.1442857 |
4.61 |
| colima |
10.51 |
1.76 |
8.75 |
2.1389538 |
4.0705714 |
3.12 |
| distrito federal |
11.00 |
1.97 |
9.03 |
1.4986115 |
2.7760000 |
2.48 |
| durango |
12.23 |
1.14 |
11.09 |
2.4056917 |
3.8565714 |
3.48 |
| guanajuato |
5.90 |
1.56 |
4.34 |
1.3113085 |
3.1977143 |
2.76 |
| guerrero |
3.06 |
0.97 |
2.09 |
0.4969693 |
2.2025714 |
2.26 |
| hidalgo |
3.46 |
1.56 |
1.90 |
0.5767255 |
2.2722857 |
2.21 |
| jalisco |
10.60 |
3.16 |
7.44 |
2.0650197 |
6.1022857 |
5.63 |
| mexico |
6.51 |
2.55 |
3.96 |
1.0881238 |
4.3457143 |
4.56 |
| michoacan |
4.15 |
1.46 |
2.69 |
0.7386203 |
2.5660000 |
2.31 |
| morelos |
3.50 |
0.91 |
2.59 |
0.7066578 |
2.3077143 |
2.10 |
| nayarit |
8.78 |
1.91 |
6.87 |
1.6842245 |
3.7422857 |
2.95 |
| nuevo leon |
11.06 |
1.00 |
10.06 |
2.2887925 |
3.3737143 |
2.64 |
| oaxaca |
1.54 |
0.92 |
0.62 |
0.1389323 |
1.2651429 |
1.26 |
| puebla |
2.79 |
1.13 |
1.66 |
0.3887487 |
1.9354286 |
1.99 |
| queretaro |
15.83 |
0.95 |
14.88 |
3.1923000 |
4.7117143 |
3.86 |
| quintana roo |
0.99 |
0.26 |
0.73 |
0.1616054 |
0.6131429 |
0.60 |
| san luis potosi |
1.75 |
0.81 |
0.94 |
0.2563096 |
1.2337143 |
1.18 |
| sinaloa |
10.54 |
1.03 |
9.51 |
3.0795467 |
5.7425714 |
6.14 |
| sonora |
7.05 |
2.77 |
4.28 |
1.1458762 |
4.6897143 |
4.69 |
| tabasco |
2.35 |
1.19 |
1.16 |
0.1957906 |
1.6331429 |
1.62 |
| tamaulipas |
4.94 |
1.56 |
3.38 |
0.9109864 |
2.9554286 |
2.74 |
| tlaxcala |
5.66 |
1.69 |
3.97 |
1.3930423 |
3.4051429 |
2.69 |
| veracruz |
2.44 |
1.47 |
0.97 |
0.2583350 |
1.8357143 |
1.84 |
| yucatan |
1.71 |
0.49 |
1.22 |
0.1963811 |
0.8885714 |
0.89 |
| zacatecas |
8.98 |
0.74 |
8.24 |
1.7751668 |
2.0965714 |
1.34 |
maize %>%
ggplot(aes(state, ag_yield)) +
geom_boxplot() +
ylab("Yield (tonnes/ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_yield)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
## state obs
## <chr> <int>
## 1 aguascalientes 35
## 2 baja california sur 35
## 3 campeche 35
## 4 chiapas 35
## 5 chihuahua 35
## 6 coahuila 35
## 7 colima 35
## 8 distrito federal 35
## 9 durango 35
## 10 guanajuato 35
## # … with 21 more rows
maize_ts <- maize %>%
ggplot(aes(year, ag_yield)) +
geom_line()+
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("Maize Yields 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Area
maize %>%
group_by(state) %>%
summarise(max_area = max(ag_harv, na.rm=T),
min_area = min(ag_harv, na.rm=T),
range_area = max(ag_harv, na.rm=T) - min(ag_harv, na.rm=T),
sd_area = sd(ag_harv, na.rm=T),
mean_area = mean(ag_harv, na.rm=T),
median_area = median(ag_harv, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
107831.0 |
11862.00 |
95969.00 |
28937.729 |
68341.029 |
66869.0 |
| baja california |
18469.0 |
6.50 |
18462.50 |
4305.230 |
3838.984 |
2510.0 |
| baja california sur |
21908.0 |
1550.00 |
20358.00 |
6358.061 |
7633.316 |
4981.0 |
| campeche |
186293.0 |
22196.00 |
164097.00 |
50751.509 |
97657.640 |
104431.0 |
| chiapas |
960143.8 |
504332.00 |
455811.84 |
120143.243 |
746915.560 |
702700.0 |
| chihuahua |
467063.0 |
78410.00 |
388653.00 |
89282.790 |
257589.847 |
259377.0 |
| coahuila |
54916.0 |
14750.55 |
40165.45 |
11165.860 |
33169.454 |
32642.0 |
| colima |
43703.0 |
11920.00 |
31783.00 |
10029.121 |
24591.167 |
24109.0 |
| distrito federal |
14384.0 |
2160.00 |
12224.00 |
3184.903 |
8485.669 |
8136.0 |
| durango |
235521.0 |
94009.76 |
141511.24 |
36710.573 |
177580.652 |
181708.0 |
| guanajuato |
455055.6 |
147933.00 |
307122.58 |
77625.786 |
327466.658 |
336557.0 |
| guerrero |
513566.0 |
341752.00 |
171814.00 |
41750.341 |
457701.783 |
466473.0 |
| hidalgo |
280490.0 |
105952.00 |
174538.00 |
32671.741 |
228483.923 |
233995.2 |
| jalisco |
916264.0 |
590515.49 |
325748.51 |
68458.874 |
738114.661 |
748010.7 |
| mexico |
746816.0 |
360047.60 |
386768.40 |
80984.400 |
607378.266 |
605970.5 |
| michoacan |
561059.0 |
355787.48 |
205271.52 |
41065.500 |
458596.557 |
464983.0 |
| morelos |
57820.0 |
19452.00 |
38368.00 |
10091.074 |
38934.708 |
39360.0 |
| nayarit |
107741.0 |
43213.25 |
64527.75 |
14931.730 |
61620.539 |
57382.0 |
| nuevo leon |
94674.3 |
12611.80 |
82062.50 |
20017.335 |
52610.726 |
54303.0 |
| oaxaca |
598942.5 |
239930.00 |
359012.52 |
92999.724 |
473285.145 |
485977.3 |
| puebla |
629374.0 |
309824.31 |
319549.69 |
75510.784 |
521914.969 |
541182.0 |
| queretaro |
123355.9 |
43143.00 |
80212.90 |
23555.979 |
86984.931 |
88259.0 |
| quintana roo |
85575.0 |
9660.00 |
75915.00 |
20425.422 |
49413.715 |
49868.0 |
| san luis potosi |
218808.1 |
46456.00 |
172352.15 |
47219.335 |
143351.988 |
146118.0 |
| sinaloa |
613197.3 |
46119.00 |
567078.29 |
180695.804 |
311682.077 |
363936.0 |
| sonora |
172528.0 |
15090.00 |
157438.00 |
38179.905 |
51395.407 |
35458.0 |
| tabasco |
105023.0 |
30880.00 |
74143.00 |
18947.972 |
66039.302 |
67377.0 |
| tamaulipas |
504704.0 |
98155.25 |
406548.75 |
100911.694 |
213186.951 |
198205.0 |
| tlaxcala |
161104.0 |
73478.00 |
87626.00 |
18408.116 |
131840.288 |
131124.0 |
| veracruz |
647535.4 |
338706.00 |
308829.40 |
74599.616 |
522976.031 |
536845.0 |
| yucatan |
166785.0 |
13128.00 |
153657.00 |
40496.663 |
123131.349 |
139404.0 |
| zacatecas |
442043.0 |
101513.60 |
340529.40 |
79128.893 |
295477.558 |
297417.5 |
maize %>%
ggplot(aes(state, ag_harv)) +
geom_boxplot() +
ylab("Area (tonnes)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 67 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_harv)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
## state obs
## <chr> <int>
## 1 aguascalientes 35
## 2 baja california sur 35
## 3 campeche 35
## 4 chiapas 35
## 5 chihuahua 35
## 6 coahuila 35
## 7 colima 35
## 8 distrito federal 35
## 9 durango 35
## 10 guanajuato 35
## # … with 21 more rows
maize_ts <- maize %>%
ggplot(aes(year, ag_harv)) +
geom_line()+
ylab("Area harvested (ha)") +
xlab("Years") +
ggtitle("maize - Area Harvested (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Losses
maize %>%
group_by(state) %>%
summarise(max_losses = max(ag_losses, na.rm=T),
min_losses = min(ag_losses, na.rm=T),
range_losses = max(ag_losses, na.rm=T) - min(ag_losses, na.rm=T),
sd_losses = sd(ag_losses, na.rm=T),
mean_losses = mean(ag_losses, na.rm=T),
median_losses = median(ag_losses, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
98179.0 |
838.0 |
97341.0 |
26988.5843 |
34821.7794 |
32473.500 |
| baja california |
3937.0 |
0.0 |
3937.0 |
874.2048 |
652.6129 |
327.000 |
| baja california sur |
4479.0 |
0.0 |
4479.0 |
758.1018 |
511.7162 |
295.750 |
| campeche |
102704.0 |
603.0 |
102101.0 |
24488.9392 |
20843.0917 |
11390.000 |
| chiapas |
120142.0 |
0.0 |
120142.0 |
30016.9511 |
28319.8903 |
19373.000 |
| chihuahua |
120423.0 |
0.0 |
120423.0 |
37834.4437 |
43595.8477 |
31065.000 |
| coahuila |
26530.0 |
4.5 |
26525.5 |
7538.9236 |
12510.2131 |
12368.000 |
| colima |
11297.0 |
0.0 |
11297.0 |
2560.0987 |
1852.5429 |
849.500 |
| distrito federal |
3083.0 |
0.0 |
3083.0 |
633.4964 |
334.8771 |
21.500 |
| durango |
116948.0 |
734.0 |
116214.0 |
30675.0632 |
29733.5677 |
16725.000 |
| guanajuato |
315239.0 |
5352.0 |
309887.0 |
70134.1731 |
94967.8444 |
68616.000 |
| guerrero |
118839.0 |
1971.0 |
116868.0 |
27630.4202 |
20315.9821 |
10258.250 |
| hidalgo |
147952.0 |
4178.9 |
143773.1 |
34477.2629 |
43952.7397 |
33819.850 |
| jalisco |
201237.0 |
0.0 |
201237.0 |
45142.1965 |
51988.8726 |
35475.500 |
| mexico |
284474.0 |
0.0 |
284474.0 |
56461.4675 |
31347.6224 |
13341.000 |
| michoacan |
134149.0 |
6713.0 |
127436.0 |
29750.4589 |
35081.6806 |
26622.500 |
| morelos |
36077.0 |
0.0 |
36077.0 |
6468.7874 |
2406.3403 |
133.000 |
| nayarit |
11254.0 |
0.0 |
11254.0 |
2878.9156 |
2337.1486 |
1261.000 |
| nuevo leon |
81924.0 |
404.0 |
81520.0 |
23709.7851 |
29212.2491 |
30140.000 |
| oaxaca |
187341.0 |
1010.0 |
186331.0 |
51409.7368 |
65867.4032 |
43324.500 |
| puebla |
292870.2 |
4240.0 |
288630.2 |
74611.8889 |
79630.7631 |
48354.000 |
| queretaro |
74808.4 |
0.0 |
74808.4 |
20777.9119 |
25499.7086 |
18273.000 |
| quintana roo |
64672.0 |
350.0 |
64322.0 |
19000.7413 |
21795.2380 |
17931.000 |
| san luis potosi |
152662.2 |
37590.0 |
115072.2 |
35708.2068 |
90717.3109 |
95791.500 |
| sinaloa |
416497.8 |
5046.0 |
411451.8 |
71139.0756 |
47302.9776 |
27423.500 |
| sonora |
24422.0 |
0.0 |
24422.0 |
5689.9027 |
4264.8015 |
2649.000 |
| tabasco |
34128.0 |
242.0 |
33886.0 |
8200.7811 |
8798.5000 |
5850.625 |
| tamaulipas |
88012.0 |
8466.0 |
79546.0 |
21456.4533 |
38187.3582 |
34609.150 |
| tlaxcala |
71459.0 |
0.0 |
71459.0 |
15066.7309 |
8802.8491 |
1498.000 |
| veracruz |
117580.0 |
5149.0 |
112431.0 |
29292.9785 |
54707.6020 |
49240.000 |
| yucatan |
171644.0 |
0.0 |
171644.0 |
45117.3412 |
32991.3453 |
13879.500 |
| zacatecas |
180285.0 |
1773.0 |
178512.0 |
53379.0982 |
68336.0388 |
69056.000 |
maize %>%
ggplot(aes(state, ag_losses)) +
geom_boxplot() +
ylab("Losses (ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 83 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_losses)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 16 x 2
## state obs
## <chr> <int>
## 1 campeche 35
## 2 chiapas 35
## 3 chihuahua 35
## 4 coahuila 35
## 5 colima 35
## 6 distrito federal 35
## 7 durango 35
## 8 hidalgo 35
## 9 morelos 35
## 10 nayarit 35
## 11 nuevo leon 35
## 12 puebla 35
## 13 queretaro 35
## 14 quintana roo 35
## 15 tlaxcala 35
## 16 veracruz 35
maize_ts <- maize %>%
ggplot(aes(year, ag_losses)) +
geom_line()+
ylab("Losses (ha)") +
xlab("Years") +
ggtitle("maize - Losses (planted-harvested) (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Yields ~ Time
Maize trends at the national level
m <- maize_nat %>%
ggplot(aes(year, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
stat_cor(label.y = 4)+
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("Maize Yields") #+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
## List of 1
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : num 0.5
## ..$ angle : num 45
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
m
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing non-finite values (stat_cor).
## Warning: Removed 2 rows containing missing values (geom_point).

Maize trends at the state level
m <- maize %>%
ggplot(aes(year, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
stat_cor()+
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("Maize Yields") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
m
## Warning: Removed 68 rows containing non-finite values (stat_smooth).
## Warning: Removed 68 rows containing non-finite values (stat_cor).
## Warning: Removed 68 rows containing missing values (geom_point).

Yields ~ Temperature
tmax_summary <- read.csv("/Users/erikaluna/R\ Studio/climate_data/tmax_summary.csv")
tmin_summary <- read.csv("/Users/erikaluna/R\ Studio/climate_data/tmin_summary.csv")
maize_yields_tmax <- left_join(maize, tmax_summary, by = c("state" = "state", "year" = "year"))
maize_yields_tmin <- left_join(maize, tmin_summary, by = c("state" = "state", "year" = "year"))
Maize yields ~ temperature at the national level
y <- maize_yields_tmax %>%
ggplot(aes(mean_tmax, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Yield (tonnes/ha)") +
xlab("Temperature (ºC) ") +
ggtitle("Maize Yields ~ Max Temperature") #+
y
## Warning: Removed 943 rows containing non-finite values (stat_smooth).
## Warning: Removed 943 rows containing missing values (geom_point).

y <- maize_yields_tmin %>%
ggplot(aes(mean_tmin, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Yield (tonnes/ha)") +
xlab("Temperature (ºC) ") +
ggtitle("Maize Yields ~ Min Temperature") #+
y
## Warning: Removed 950 rows containing non-finite values (stat_smooth).
## Warning: Removed 950 rows containing missing values (geom_point).

Maize yields ~ temperature at the state level
y <- maize_yields_tmax %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmax, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Yield (tonnes/ha)") +
xlab("Temperature (ºC) ") +
ggtitle("Maize Yields ~ Max Temperature") +
#theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
facet_wrap(~state, scales="free_y")
y
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).

yt <- maize_yields_tmin %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmin, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Yield (tonnes/ha)") +
xlab("Temperature (ºC)") +
ggtitle("Maize Yields ~ Min Temperature") +
facet_wrap(~state, scales="free_y")
yt
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
## Warning: Removed 25 rows containing missing values (geom_point).

Losses ~ Time
Maize losses trends at the national level
lt <- maize_nat %>%
ggplot(aes(year, ag_losses)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
scale_y_continuous(labels = scales::comma) +
ylab("Losses (ha)") +
xlab("Years") +
ggtitle("Maize Losses") #+
#theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
lt
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

Maize losses trends at the state level
lt <- maize %>%
ggplot(aes(year, ag_losses)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Losses (ha)") +
xlab("Years") +
ggtitle("Maize Losses") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
lt
## Warning: Removed 83 rows containing non-finite values (stat_smooth).
## Warning: Removed 83 rows containing missing values (geom_point).

Losses ~ Temperature
lt <- maize_yields_tmax %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmax, ag_losses)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Losses (ha)") +
xlab("Temperature (ºC) ") +
ggtitle("Maize Losses ~ Max Temperature") +
#theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
facet_wrap(~state, scales="free_y")
lt
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

lt <- maize_yields_tmin %>%
filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmin, ag_losses)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Losses (ha)") +
xlab("Temperature (ºC)") +
ggtitle("Maize Losses ~ Min Temperature") +
#theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
facet_wrap(~state, scales="free_y")
lt
## Warning: Removed 28 rows containing non-finite values (stat_smooth).
## Warning: Removed 28 rows containing missing values (geom_point).
